#dsffffffffffffffffffff
import gensim
from gensim.models import word2vec
import os
import random

#由于二者文件名一致，所以只需要获取其中一个文件夹中文件名即可
dir='G:/corpus/first_step/Europarl remove_stop/'
en_dir=dir+'en/'
en_filename=os.listdir(en_dir)
root='Europarl remove_stop/'

sentences=[]
for file in en_filename:
	with open(root+'en/'+file,'r',encoding='utf-8') as rf:
		en_text=rf.read()
		enword_union=en_text.split()
	with open(root+'nl/'+file,'r',encoding='utf-8') as pf:
		nl_text=pf.read()
		nlword_union=nl_text.split()
	enword_union.extend(nlword_union)
	if len(enword_union)>60:                                      #由于窗口设置为60，我们会默认过滤掉训练文档长度小于60的文档
		random.shuffle(enword_union)
		sentences.append(enword_union)


model=word2vec.Word2Vec(sentences, size=600,window=60)

model.save('we_vs9.model')


